rootdir="/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist"
setwd(rootdir)  # Set this to correct location
source("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/project/R/seurat_processing.R")
source("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/project/R/DE-analysis.R")
source("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/project/R/visualisation_tools_and_pathway_analysis.R")

Load the objects

merged_gene<-readRDS("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist/merge_integration/cell_cycle_regression/regressed_obj_gene.rds")
merged_iso<-readRDS("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist/merge_integration/cell_cycle_regression/regressed_obj_isoform.rds")

General venn diagram:

How many genes and transcripts overlap in our different clusters?

plot_venn(merged_gene,"gene")

plot_venn(merged_iso,"isoform")

DE - sample level

We’ll run the DE testing on the unintegrated assay.

DE_samples_iso<-DE_samples(merged_iso,
                     output_dir=paste0(rootdir,"/analyses"),
                     "", #to specify what we are working with in the output file name 
                     topn=15)

markers_sample_iso<-DE_samples_iso$markers
head(markers_sample_iso)
##                                                      p_val avg_log2FC pct.1
## ENST00000309268.11--EEF1A1                               0   18.20146 1.000
## ENSG00000196262.15-44796680-44801631-1--PPIA             0   17.34552 0.999
## ENSG00000118181.11-119015712-119018582-1--RPS25          0   16.58348 0.998
## ENSG00000281181.1-8437202-8437688-1--ENSG00000281181     0   16.06028 0.975
## ENSG00000186468.13-82276060-82278348-1--RPS23            0   15.70314 0.968
## ENSG00000169567.13-131159292-131165106-2--HINT1          0   15.50109 0.954
##                                                      pct.2 p_val_adj cluster
## ENST00000309268.11--EEF1A1                               0         0    ctrl
## ENSG00000196262.15-44796680-44801631-1--PPIA             0         0    ctrl
## ENSG00000118181.11-119015712-119018582-1--RPS25          0         0    ctrl
## ENSG00000281181.1-8437202-8437688-1--ENSG00000281181     0         0    ctrl
## ENSG00000186468.13-82276060-82278348-1--RPS23            0         0    ctrl
## ENSG00000169567.13-131159292-131165106-2--HINT1          0         0    ctrl
##                                                                                                      gene
## ENST00000309268.11--EEF1A1                                                     ENST00000309268.11--EEF1A1
## ENSG00000196262.15-44796680-44801631-1--PPIA                 ENSG00000196262.15-44796680-44801631-1--PPIA
## ENSG00000118181.11-119015712-119018582-1--RPS25           ENSG00000118181.11-119015712-119018582-1--RPS25
## ENSG00000281181.1-8437202-8437688-1--ENSG00000281181 ENSG00000281181.1-8437202-8437688-1--ENSG00000281181
## ENSG00000186468.13-82276060-82278348-1--RPS23               ENSG00000186468.13-82276060-82278348-1--RPS23
## ENSG00000169567.13-131159292-131165106-2--HINT1           ENSG00000169567.13-131159292-131165106-2--HINT1

Let’s have a look at the genes that have differentially expressed transcripts between the samples:

lapply(c("ctrl","S24","R"), function(x) {
  gene_list<-markers_sample_iso |> 
  filter(avg_log2FC>2 & cluster==x) |> 
  mutate(
      gene_name = sub(".*--", "",gene)) |>pull(gene_name)

run_enrichR(gene_list,x)
})
## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2025... Done.
##   Querying GO_Cellular_Component_2025... Done.
##   Querying GO_Biological_Process_2025... Done.
##   Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.

## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2025... Done.
##   Querying GO_Cellular_Component_2025... Done.
##   Querying GO_Biological_Process_2025... Done.
##   Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.

## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2025... Done.
##   Querying GO_Cellular_Component_2025... Done.
##   Querying GO_Biological_Process_2025... Done.
##   Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.

## [[1]]

## 
## [[2]]

## 
## [[3]]

The genes involved in these pathway,etc… express different isoforms .

Same analysis on the gene-level:

DE_samples_gene<-DE_samples(merged_gene,
                     output_dir=paste0(rootdir,"/analyses"),
                     "3samples_gene", #to specify what we are working with in the output file name 
                     topn=10,
                     type="gene")

markers_sample_gene<-DE_samples_gene$markers
head(markers_sample_gene)
##                 p_val  avg_log2FC pct.1 pct.2 p_val_adj cluster            gene
## RPS28P7             0 -12.5357112 0.000 0.580         0    ctrl         RPS28P7
## ENSG00000280800     0 -13.7771634 0.000 0.508         0    ctrl ENSG00000280800
## MIF                 0 -14.4798635 0.000 0.470         0    ctrl             MIF
## UCHL1               0   0.8881663 0.989 0.525         0    ctrl           UCHL1
## KRT18               0 -10.8232339 0.002 0.465         0    ctrl           KRT18
## KRT19               0 -14.0837799 0.000 0.455         0    ctrl           KRT19

Let’s have a look at the genes that are differentially expressed between the samples:

lapply(c("ctrl","S24","R"), function(x) {
  gene_list<-markers_sample_gene |> 
  filter(avg_log2FC>2 & cluster==x) |>pull(gene)

run_enrichR(gene_list,x)
})
## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2025... Done.
##   Querying GO_Cellular_Component_2025... Done.
##   Querying GO_Biological_Process_2025... Done.
##   Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.

## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2025... Done.
##   Querying GO_Cellular_Component_2025... Done.
##   Querying GO_Biological_Process_2025... Done.
##   Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.

## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2025... Done.
##   Querying GO_Cellular_Component_2025... Done.
##   Querying GO_Biological_Process_2025... Done.
##   Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.

## [[1]]

## 
## [[2]]

## 
## [[3]]

DE - cluster level

We will now use the cluster labels obtained after rpca integration and run DE analysis across these cluster to identify some cellular clones for example. We will also run pathway enrchment analysis on these DE’ed genes and transcripts.

dir.create(paste0(rootdir,"/analyses/DE_clusters"))
merged_gene<-readRDS("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist/merge_integration/cell_cycle_regression/regressed_obj_gene_integrated.rds")
merged_iso<-readRDS("/media/inserm-root/P4/Single_cell_rna_seq_analyses_eline/sc3samples_whitelist/merge_integration/cell_cycle_regression/regressed_obj_isoform_integrated.rds")
res_de_iso<-DE_clusters(merged_iso, # an integrated seurat object
                      outdir=paste0(rootdir,"/analyses/DE_clusters"),
                      topn=10,
                      type="isoform")

res_de_gene<-DE_clusters(merged_gene, # an integrated seurat object
                      outdir=paste0(rootdir,"/analyses/DE_clusters"),
                      topn=10,
                      type="gene")

## Focus on individual clusters At transcript-level, clusters 1 and 4 seem to stand out more. At gene-level we might want to focus on clusters 1 that stays present across samples and cluster 2 that seem to appear in the resistant cells. The following function extracts a list of DE’ed markers in a specified cluster,returns the list and plots Vln Plots for these markers

list_gene1<-extract_cluster_feature(num_cluster=1,
                                  marker_table_list=res_de_gene$markers, # the $markers output from DE_cluster()
                                  type="gene",
                                  cluster_names="rpca_clusters", # or "clusters" if the object is not integrated
                                  integrated=TRUE,
                                  seu_obj=merged_gene # used in DE_cluster()
                                  )

list_gene2<-extract_cluster_feature(num_cluster=2,
                                  marker_table_list=res_de_gene$markers, # the $markers output from DE_cluster()
                                  type="gene",
                                  cluster_names="rpca_clusters", # or "clusters" if the object is not integrated
                                  integrated=TRUE,
                                  seu_obj=merged_gene # used in DE_cluster()
                                  )

Some of these genes are indeed specific to a cluster but not all of them.

Do they have DE’ed transcripts in the 3 samples? For examples with the 10 first genes of cluster 1

num_cluster=1
type="gene"
 setwd(paste0(rootdir,"/analyses/cluster",num_cluster,"_",type))
lapply(list_gene1[1:10], function(x) {
  plot_feature_iso(merged_iso,
                   x,
                   sample_name = "",
                   output_dir = "./")
})
## [1] "plotted in .//featureplot_H4C3.png"
## [1] "plotted in .//featureplot_H3C4.png"
## [1] "plotted in .//featureplot_H3C15.png"
## [1] "plotted in .//featureplot_H1-4.png"
## [1] "plotted in .//featureplot_H1-3.png"
## [1] "plotted in .//featureplot_H1-5.png"
## [1] "plotted in .//featureplot_H4C4.png"
## [1] "plotted in .//featureplot_H3C2.png"
## [1] "plotted in .//featureplot_H2AC17.png"
## [1] "plotted in .//featureplot_CDK1.png"
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

Not really…

Can identify the biological function of cluster 1 with a pathway enrichment analysis?

#gene_list<-sub(".*--", "",)

run_enrichR(list_gene1[1:10],"cluster1 gene-level")
## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2025... Done.
##   Querying GO_Cellular_Component_2025... Done.
##   Querying GO_Biological_Process_2025... Done.
##   Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.

Let’s now focus on DE’ed transcripts in clusters 1 and 4.

list1<-extract_cluster_feature(num_cluster=1,
                                  marker_table_list=res_de_iso$markers, # the $markers output from DE_cluster()
                                  type="isoform",
                                  cluster_names="rpca_clusters", # or "clusters" if the object is not integrated
                                  integrated=TRUE,
                                  seu_obj=merged_iso # used in DE_cluster()
                                  )

list1
##  [1] "ENST00000377803.4--H4C3"                        
##  [2] "ENST00000356476.3--H3C4"                        
##  [3] "ENST00000304218.6--H1-4"                        
##  [4] "ENST00000403683.2--H3C15"                       
##  [5] "ENSG00000175063.17-45814410-45816957-1--UBE2C"  
##  [6] "ENST00000244534.7--H1-3"                        
##  [7] "ENST00000331442.5--H1-5"                        
##  [8] "ENST00000614247.2--H4C4"                        
##  [9] "ENST00000359611.4--H2AC17"                      
## [10] "ENST00000621411.3--H3C2"                        
## [11] "ENST00000699425.1--UBE2T"                       
## [12] "ENSG00000153048.11-8854921-8869006-1--CARHSP1"  
## [13] "ENSG00000175063.17-45814410-45816952-1--UBE2C"  
## [14] "ENST00000343677.4--H1-2"                        
## [15] "ENSG00000077152.12-202331657-202335014-1--UBE2T"
## [16] "ENST00000333151.5--H2AC14"                      
## [17] "ENST00000646651.1--UBE2T"
list4<-extract_cluster_feature(num_cluster=4,
                                  marker_table_list=res_de_iso$markers, # the $markers output from DE_cluster()
                                  type="isoform",
                                  cluster_names="rpca_clusters", # or "clusters" if the object is not integrated
                                  integrated=TRUE,
                                  seu_obj=merged_iso # used in DE_cluster()
                                  )

Are these transcripts DE’ed in the samples?

setwd(paste0(rootdir,"/analyses/cluster",1,"_isoform"))
lapply(list1,function(x) {
  plot1<-FeaturePlot(merged_iso, features = x,combine=TRUE,split.by="sample") +theme(strip.text = element_text(size = 1)) #+labs(title = paste0("Differential transcript expression per sample for ",gene))
  
  ggsave(filename = paste0("featureplot",x,".png") ,
         plot = plot1, width=8,height=5,limitsize = FALSE)
  
  print(paste0("plotted in ",paste0("featureplot",x,".png") ))
  return(plot1) 
})
## [1] "plotted in featureplotENST00000377803.4--H4C3.png"
## [1] "plotted in featureplotENST00000356476.3--H3C4.png"
## [1] "plotted in featureplotENST00000304218.6--H1-4.png"
## [1] "plotted in featureplotENST00000403683.2--H3C15.png"
## [1] "plotted in featureplotENSG00000175063.17-45814410-45816957-1--UBE2C.png"
## [1] "plotted in featureplotENST00000244534.7--H1-3.png"
## [1] "plotted in featureplotENST00000331442.5--H1-5.png"
## [1] "plotted in featureplotENST00000614247.2--H4C4.png"
## [1] "plotted in featureplotENST00000359611.4--H2AC17.png"
## [1] "plotted in featureplotENST00000621411.3--H3C2.png"
## [1] "plotted in featureplotENST00000699425.1--UBE2T.png"
## [1] "plotted in featureplotENSG00000153048.11-8854921-8869006-1--CARHSP1.png"
## [1] "plotted in featureplotENSG00000175063.17-45814410-45816952-1--UBE2C.png"
## [1] "plotted in featureplotENST00000343677.4--H1-2.png"
## [1] "plotted in featureplotENSG00000077152.12-202331657-202335014-1--UBE2T.png"
## [1] "plotted in featureplotENST00000333151.5--H2AC14.png"
## [1] "plotted in featureplotENST00000646651.1--UBE2T.png"
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

Some of the transcripts seem to be specific to a sample.

Again, let’s try to identify the biological function of cluster 1 using pathway analysis:

gene_list1<-sub(".*--", "",list1)

run_enrichR(gene_list1,"cluster1 transcript-level")
## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2025... Done.
##   Querying GO_Cellular_Component_2025... Done.
##   Querying GO_Biological_Process_2025... Done.
##   Querying Reactome_Pathways_2024... Done.
## Parsing results... Done.